Run UMAP on CD4+ events which express a COMPASS subset. Repeat for CD8 events.
The question being asked is “What are the memory and activation profiles of Ag-specific T cells?”

This time around, don’t sample the events.
Don’t stratify by groups, but rather color the sub-localization of the different markers.
Color by Cohort and Antigen (S1, S2, NCAP, VEMP, including DMSO)
Also color by
- Degree of functionality
- Cytokine - CD45RA
- CCR7
- HLA-DR
- CD38
Boxplots?

library(openCyto)
library(CytoML) # 1.12.0
library(flowCore) # required for description()
library(flowWorkspace) # required for gh_pop_get_data()
library(here)
library(tidyverse)
library(uwot)
library(ggplot2)
library(scales)
library(patchwork)
library(hues)
library(RColorBrewer)
library(ggrepel)
library(ggpubr)
library(tidyselect)
source(here::here("scripts/20200604_Helper_Functions.R")) # for distributeEvents() and sampleGatingHierarchy()
date <- 20200814
save_output <- TRUE
rerun_dimred <- FALSE

stims_for_compass_runs <- c("VEMP", "Spike 1", "Spike 2", "NCAP")
parent_nodes_for_compass_runs <- c("4+", "NOT4+", "8+")
stims_for_compass_runs_rep <- rep(stims_for_compass_runs, each = length(parent_nodes_for_compass_runs))
parent_nodes_for_compass_runs_rep <- rep(parent_nodes_for_compass_runs, times = length(stims_for_compass_runs))

crList <- purrr::pmap(.l = list(parent_nodes_for_compass_runs_rep,
                                stims_for_compass_runs_rep),
                      .f = function(parent, currentStim) {
                        currentStimForFile <- gsub(" ", "_", currentStim)
                        crPath <- here::here(sprintf("out/CompassOutput/%s/%s/COMPASSResult_%s_%s.rds", parent, currentStimForFile, parent, currentStimForFile))
                        readRDS(crPath)
                      }) %>% 
  set_names(gsub("+", "", gsub(" ", "_", paste0(parent_nodes_for_compass_runs_rep, "_", stims_for_compass_runs_rep)), fixed=TRUE))
names(crList)
##  [1] "4_VEMP"       "NOT4_VEMP"    "8_VEMP"       "4_Spike_1"    "NOT4_Spike_1"
##  [6] "8_Spike_1"    "4_Spike_2"    "NOT4_Spike_2" "8_Spike_2"    "4_NCAP"      
## [11] "NOT4_NCAP"    "8_NCAP"
crList.no_healthy <- lapply(names(crList),
                  function(cr_name) {
                    cr <- crList[[cr_name]]
                    print(sprintf("Accessing %s", cr_name))
                    cr$data$meta <- cr$data$meta %>% 
                      dplyr::filter(!(`SAMPLE ID` %in% setdiff(cr$data$meta$`SAMPLE ID`, rownames(cr$fit$mean_gamma)))) %>% 
                      mutate(Cohort = factor(ifelse(Cohort %in%
                                                c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", Cohort),
                                              levels = rev(c("Healthy", "Non-hospitalized", "Hospitalized"))))
                    stopifnot(all.equal(rownames(cr$fit$mean_gamma), cr$data$meta$`SAMPLE ID`))
                    
                    stopifnot(all.equal(rownames(cr$data$n_s), rownames(cr$fit$mean_gamma)))
                    stopifnot(all.equal(rownames(cr$data$n_u), rownames(cr$fit$mean_gamma)))
                    stopifnot(all.equal(names(cr$data$counts_s), rownames(cr$fit$mean_gamma)))
                    stopifnot(all.equal(names(cr$data$counts_u), rownames(cr$fit$mean_gamma)))
                    
                    not_healthy_idx <- which(cr$data$meta$Cohort != "Healthy")
                    cr$data$meta <- cr$data$meta[not_healthy_idx,] %>% 
                      mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")))
                    cr$fit$mean_gamma <- cr$fit$mean_gamma[not_healthy_idx,]

                    cr$data$n_s <- cr$data$n_s[not_healthy_idx,]
                    cr$data$n_u <- cr$data$n_u[not_healthy_idx,]
                    cr$data$counts_s <- cr$data$counts_s[not_healthy_idx]
                    cr$data$counts_u <- cr$data$counts_u[not_healthy_idx]
                    
                    cr
                  })
## [1] "Accessing 4_VEMP"
## [1] "Accessing NOT4_VEMP"
## [1] "Accessing 8_VEMP"
## [1] "Accessing 4_Spike_1"
## [1] "Accessing NOT4_Spike_1"
## [1] "Accessing 8_Spike_1"
## [1] "Accessing 4_Spike_2"
## [1] "Accessing NOT4_Spike_2"
## [1] "Accessing 8_Spike_2"
## [1] "Accessing 4_NCAP"
## [1] "Accessing NOT4_NCAP"
## [1] "Accessing 8_NCAP"
names(crList.no_healthy) <- names(crList)

CD4RunNames <- c("4_Spike_1", "4_Spike_2", "4_NCAP", "4_VEMP")
NotCD4RunNames <- c("NOT4_Spike_1", "NOT4_Spike_2", "NOT4_NCAP", "NOT4_VEMP")
CD8RunNames <- c("8_Spike_1", "8_Spike_2", "8_NCAP", "8_VEMP")

gs <- load_gs(here::here("out/GatingSets/20200805_HAARVI_ICS_GatingSet_AllBatches"))
## loading R object...
## loading tree object...
## Done
pData(gs)$Batch <- ifelse(pData(gs)$`EXPERIMENT NAME` == "20200605_COVID_ICS-B3", 3, pData(gs)$Batch)
gs2 <- subset(gs, !(`SAMPLE ID` %in% c("37C", "BWT23", "116C", "BWT22")) &
                !(`SAMPLE ID` == "551432" & STIM == "Spike 2"))
dput(gh_get_pop_paths(gs2))
## c("root", "/Time", "/Time/LD-3+", "/Time/LD-3+/1419-3+", "/Time/LD-3+/1419-3+/S", 
## "/Time/LD-3+/1419-3+/S/Lymph", "/Time/LD-3+/1419-3+/S/Lymph/4+", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/CD38+", 
## "/Time/LD-3+/1419-3+/S/Lymph/HLADR+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/154", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CD45RA+", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+/107a", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/154", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL2", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL4513", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+/CCR7+", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD45RA+")

CD4

Add COMPASS boolean gates

# Add the CD4+ COMPASS boolean cytokine subset gates to the GatingSet

cd4_categories <- unique(do.call(rbind, lapply(CD4RunNames, function(runName) {
  # Filter the subsets to those where the average mean_gamma is greater than the threshold (default in heatmap and this function is 0.01)
  filteredSubsetIndicesCurrentRun <- which(apply(crList.no_healthy[[runName]]$fit$mean_gamma, 2, function(x) { mean(x, na.rm = TRUE) }) > 0.01) # names()
  # TODO: Assuming here that mean_gamma colnames are in same order as categories rows. Should do formal check.
  crList.no_healthy[[runName]]$data$categories[filteredSubsetIndicesCurrentRun,]
})))
# Remove the category with zero positive cytokines
cd4_categories <- cd4_categories[-which(cd4_categories[,"Counts"] == 0),]

knitr::kable(cd4_categories)
IL2 IL4/5/13 IFNg TNFa IL17a CD154 CD107a Counts
0 1 0 0 0 0 0 1
0 0 0 0 0 1 0 1
1 1 0 0 0 0 0 2
1 0 0 0 0 1 0 2
0 0 1 0 0 1 0 2
0 0 0 1 0 1 0 2
1 0 1 1 0 0 0 3
1 0 1 0 0 1 0 3
1 0 0 1 0 1 0 3
0 0 1 1 0 1 0 3
1 1 0 1 0 1 0 4
1 0 1 1 0 1 0 4
1 1 1 1 0 1 0 5
1 0 1 1 0 1 1 5
0 0 0 1 0 0 0 1
0 0 0 0 0 0 1 1
0 0 1 1 0 0 0 2
1 0 0 0 0 0 0 1
0 0 1 0 0 0 0 1
0 0 0 0 1 0 0 1
0 1 0 0 1 0 0 2
1 1 1 0 0 0 0 3
mapMarkers <- c("IL2", "IL4/5/13", "IFNg", "TNFa", "IL17a", "CD154", "CD107a")
cd4NodeMarkerMap <- mapMarkers
# NodeMarkerMap names are gating tree paths
names(cd4NodeMarkerMap) <- paste0("4+", "/", c("IL2", "IL4513", "IFNG", "TNF", "IL17", "154", "107a"))

cd4_cats_mod <- as.data.frame(cd4_categories) %>% 
  dplyr::select(-Counts) %>% 
  mutate_all(~ recode(., "0" = "!", "1" = "")) %>% 
  dplyr::rename_at(all_of(cd4NodeMarkerMap), ~ names(cd4NodeMarkerMap))
cd4_booleanSubsets <- cd4_cats_mod %>% 
  rowwise() %>% 
  do(booleanSubset = paste(paste0(., colnames(cd4_cats_mod)), collapse="&")) %>% 
  ungroup() %>% 
  dplyr::pull(booleanSubset) %>% 
  unlist()
names(cd4_booleanSubsets) <- paste0("CD4_", gsub("4\\+\\/", "", gsub("\\&", "_AND_", gsub("\\!", "NOT_", cd4_booleanSubsets))))
for(booleanSubsetName in names(cd4_booleanSubsets)) {
  # booleanSubset The booleanSubset (a combination of existing gates) in string format, e.g. "8+/GMM+&!8+/GAMMADELTA"
  call <- substitute(flowWorkspace::booleanFilter(v), list(v = as.symbol(cd4_booleanSubsets[[booleanSubsetName]])))
  g <- eval(call)
  suppressWarnings(flowWorkspace::gs_pop_add(gs2, g, parent = "4+", name=booleanSubsetName))
}

Extract mfi data

cd4_gates_for_dimred <- c(
  "/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154", 
  "/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2", 
  "/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513", 
  "/Time/LD-3+/1419-3+/S/Lymph/4+/TNF",
  "/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+",
  "/Time/LD-3+/1419-3+/S/Lymph/CD38+", "/Time/LD-3+/1419-3+/S/Lymph/HLADR+")
cd4_cytokine_gates <- c("/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154", 
                        "/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2", 
                        "/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513", 
                        "/Time/LD-3+/1419-3+/S/Lymph/4+/TNF")

Still need to add boolean gates.

gs_pop_add(gs2, eval(substitute(flowWorkspace::booleanFilter(v),
                                       list(v = as.symbol(paste(names(cd4_booleanSubsets), collapse="|"))))),
           parent = "/Time/LD-3+/1419-3+/S/Lymph/4+", name = "CD4_COMPASS_Subsets")
## [1] 61
recompute(gs2, "/Time/LD-3+/1419-3+/S/Lymph/4+")
## .........................................................................................................................................................................................................................................................................................................................................................................................done!
cd4_compass_subsets_parentGate <- "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets"

pop_counts <- pData(gs2) %>% 
  left_join(gs_pop_get_count_fast(gs2, subpopulations = cd4_compass_subsets_parentGate),
            by = c("rowname" = "name")) %>% 
  dplyr::rename(CD4_COMPASS_Subsets = Count) %>% 
  dplyr::select(rowname, Batch, "SAMPLE ID", STIM, Cohort, CD4_COMPASS_Subsets) %>% 
  dplyr::filter(!(Cohort %in% c(NA, "Healthy control", "Healthy control 2017-2018")) & STIM != "SEB")

Keep in mind that there is lopsided patient and group representation simply due to not sampling:

cd4_compass_subsets_sampleSizes_4plot <- pop_counts %>%
  mutate(Cohort = factor(Cohort,
                         levels = c("Non-hospitalized", "Hospitalized"),
                         labels = c("Conv\nNon-Hosp", "Conv\nHosp")))
ggplot(cd4_compass_subsets_sampleSizes_4plot,
       aes(factor(Cohort), CD4_COMPASS_Subsets)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.15, height = 0) +
  theme_bw(base_size=20) +
  labs(title="ICS CD4 UMAP patient representation",
       y="CD4+ COMPASS Subset+ Events\n for Dimensionality Reduction\n(not sampled)") +
  facet_grid(Batch ~ STIM) +
  theme(axis.title.x = element_blank())

# Extract data for dimensionality reduction (not actually sampling)
call_sampleGatingHierarchy_for_cd4 <- function(currentSampleName) {
  # print(sprintf("Sampling data from %s", currentSampleName))
  sampleGatingHierarchy(gs2[[currentSampleName]], cd4_compass_subsets_parentGate, n = NULL, otherGates = cd4_gates_for_dimred)
}
cd4_compass_subsets_data <- map_dfr(pop_counts$rowname, call_sampleGatingHierarchy_for_cd4)
dim(cd4_compass_subsets_data)
## [1] 147945     55
knitr::kable(head(cd4_compass_subsets_data))
name EXPERIMENT NAME $DATE SAMPLE ID PATIENT ID STIM WELL ID PLATE NAME filename rowname Sample ID Collection date Cell count Cohort Age Sex Race Hispanic? Days symptom onset to visit 1 Pair ID Race_v2 Batch /Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets /Time/LD-3+/1419-3+/S/Lymph/4+/107a /Time/LD-3+/1419-3+/S/Lymph/4+/154 /Time/LD-3+/1419-3+/S/Lymph/4+/IFNG /Time/LD-3+/1419-3+/S/Lymph/4+/IL2 /Time/LD-3+/1419-3+/S/Lymph/4+/IL17 /Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 /Time/LD-3+/1419-3+/S/Lymph/4+/TNF /Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+ /Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+ /Time/LD-3+/1419-3+/S/Lymph/CD38+ /Time/LD-3+/1419-3+/S/Lymph/HLADR+ Time FSC-A FSC-H SSC-A SSC-H CD8b TNFa CD107a CD154 CD3 ECD IL2 CD4 IL17a IL4/5/13 CD14/CD19 CCR7 CD38 L/D IFNg CD45RA HLADR
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 0 0 0 0 1 0 0 0 0 1.502 115906.84 96585 27485.91 27497 1064.5720 1440.4530 393.4271 648.1245 2909.432 619.5652 1705.126 861.0718 777.4784 1025.7701 625.7065 631.8912 1210.5221 965.0119 1267.662 499.4432
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 0 0 1 0 0 1 1 1 0 1.525 88823.48 78464 17823.69 16608 1045.8657 619.7026 516.1617 1140.8926 3061.690 651.8842 1890.087 1941.8019 452.2898 784.3320 2844.8445 2105.2791 1321.1499 592.8734 2671.356 315.9288
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 0 0 1 0 0 1 1 1 0 1.869 76075.24 61697 15965.37 15489 1093.0385 613.5021 554.5132 790.8342 2874.763 235.4683 1892.790 1955.9308 589.2085 1058.5559 2115.3691 2029.1339 1105.2609 319.0016 3293.626 899.7007
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 0 0 0 1 0 1 1 0 0 2.011 50676.04 36166 13068.27 12006 337.6944 592.8991 446.3646 816.1756 2913.806 592.0229 1654.902 486.0011 2992.3889 979.4083 2385.1418 1194.2875 716.8176 820.3757 2494.246 561.7028
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 0 0 1 0 0 1 0 0 0 2.129 82473.68 64294 24553.14 23922 1192.3547 196.5198 658.7712 895.1425 2895.531 739.8292 1828.663 1943.6406 652.2826 266.8253 2725.9609 954.5881 1382.0779 331.3962 1562.413 484.5808
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 0 0 0 0 1 1 0 0 0 2.277 95741.76 77970 31374.81 30438 404.7302 2342.6272 421.5775 1543.7673 2929.278 1151.6838 1943.653 1273.3855 859.4286 847.0671 1992.1693 1230.5979 1355.7234 390.6719 1200.687 458.7832

Is there maybe one cytokine that is dominating the entire sample?
If one cytokine has very high background expression (and a generous gate), it could be gated positive in a lot of events.
The high number of events expressing this cytokine could lead to it dominating the data, so that most sampled events are positive for this noisy cytokine. It would drown out real signal from other cytokines.

cytokine_dominance <- cd4_compass_subsets_data %>% 
  group_by(Batch) %>% 
  summarise_at(cd4_cytokine_gates, sum) %>%
  t() %>%
  as.data.frame() %>%
  set_names(c("B1", "B2", "B3")) %>%
  rownames_to_column("Cytokine_Gate") %>%
  dplyr::filter(Cytokine_Gate != "Batch")
knitr::kable(cytokine_dominance)
Cytokine_Gate B1 B2 B3
/Time/LD-3+/1419-3+/S/Lymph/4+/107a 10571 15787 12987
/Time/LD-3+/1419-3+/S/Lymph/4+/154 10647 12296 12535
/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG 7346 4490 5410
/Time/LD-3+/1419-3+/S/Lymph/4+/IL2 10328 8292 9823
/Time/LD-3+/1419-3+/S/Lymph/4+/IL17 15257 1136 14258
/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 6356 9478 10858
/Time/LD-3+/1419-3+/S/Lymph/4+/TNF 11204 11027 14674
cytokine_dominance %>%
  pivot_longer(cols = starts_with("B"), names_to = "Batch", values_to = "Events_in_Gate") %>% 
  mutate(Cytokine = sub(".*4\\+\\/(.*)", "\\1", Cytokine_Gate)) %>% 
  ggplot(aes(Cytokine, Events_in_Gate, fill = Cytokine)) +
  theme_bw(base_size=18) +
  geom_bar(stat="identity") +
  facet_grid(. ~ Batch) +
  labs(title = "CD4 Run Cytokine Dominance by Batch")

Perform Dimensionality Reduction

cols_4_dimred <- c("CD3 ECD", "CD8b", "CD4",
                  "TNFa", "CD107a", 
                  "CD154", "IL2", "IL17a", 
                  "IL4/5/13", "IFNg", 
                  "CCR7", "CD45RA",
                  "CD38", "HLADR")
cd4.scaled_dimred_input <- cd4_compass_subsets_data %>% 
  dplyr::select(Batch, all_of(cols_4_dimred)) %>% 
  group_by(Batch) %>% 
  nest() %>% 
  ungroup() %>% 
  mutate(data = lapply(data, function(df) {as.data.frame(scale(as.matrix(df)))})) %>% 
  unnest(cols = c(data)) %>% 
  rename_at(vars(all_of(cols_4_dimred)),function(x) paste0(x,".scaled")) %>% 
  dplyr::select(-Batch)

cd4_compass_subsets_data <- cbind(cd4_compass_subsets_data, cd4.scaled_dimred_input)

# UMAP can take a long time, so there is a rerun_dimred switch
if(rerun_dimred) {
  print("Running UMAP")
  set.seed(date)
  print(Sys.time())
  cd4_compass_subsets_dimred_out <- cd4_compass_subsets_data %>% 
    # Run CD3, co-receptor, cytokine, memory, and activation markers through UMAP
    dplyr::select(all_of(paste0(cols_4_dimred, ".scaled"))) %>% 
    uwot::umap(spread = 9, min_dist = 0.02, n_threads = 7)
  print(Sys.time())
  cd4_compass_subsets_w_umap <- cbind(as.data.frame(cd4_compass_subsets_dimred_out) %>% 
    dplyr::rename(x.umap = V1, y.umap = V2),
    cd4_compass_subsets_data)
  if(save_output) {
    saveRDS(cd4_compass_subsets_w_umap, here::here(sprintf("out/UMAP/%s_ICS_CD4_COMPASS_Subsets_UMAP_Unsampled.rds", date)))
  }
} else {
  # Assuming UMAP results are already saved
  print("Loading saved UMAP run")
  cd4_compass_subsets_w_umap <- readRDS(here::here(sprintf("out/UMAP/%s_ICS_CD4_COMPASS_Subsets_UMAP_Unsampled.rds", date)))
}
## [1] "Loading saved UMAP run"

Plot UMAP results

Shuffle data frame rows so e.g. Batch 3 doesn’t dominate foreground

set.seed(date)
cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap[sample(nrow(cd4_compass_subsets_w_umap), nrow(cd4_compass_subsets_w_umap)),]
# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
                   metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
                               here::here("data/Arial_afm/ArialMT-Bold.afm"), 
                               here::here("data/Arial_afm/ArialMT-Italic.afm"),
                               here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)

boolColorScheme <- c("FALSE" = "#E2E2E2", "TRUE" = "#023FA5")

cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap %>% 
  mutate(cytokine_degree = rowSums(dplyr::select(., all_of(cd4_cytokine_gates))))
cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap %>% 
  mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")),
         STIM = factor(STIM, levels = c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")))

stim_labs <- c("DMSO", "S1", "S2", "NCAP", "VEMP") 
names(stim_labs) <- c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")
cohort_labs <- c("Conv\nNon-Hosp", "Conv\nHosp")
names(cohort_labs) <- c("Non-hospitalized", "Hospitalized")

base_dimred_plot <- function(currentColumn, pointSize = 0.02, colorScheme = NA) {
  p <- ggplot(cd4_compass_subsets_w_umap, aes(x=x.umap, y=y.umap,
                                       colour=if(currentColumn %in% c("Batch", "SAMPLE ID")) {
                                         factor(!!as.name(currentColumn))
                                         } else {
                                           as.logical(!!as.name(currentColumn))
                                           })) +
    geom_point(shape=20, alpha=0.8, size=pointSize) +
    facet_grid(Cohort ~ STIM, switch="y",
               labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
    theme_bw() +
    theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
          axis.text=element_blank(),
          axis.line=element_blank(),
          axis.ticks=element_blank(),
          axis.title=element_blank(),
          strip.text=element_text(face="bold", size=22),
          panel.grid.major = element_blank(),
          legend.title=element_text(face="bold", size=14),
          strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")))
  if(!anyNA(colorScheme)) {
    p <- p + scale_color_manual(values = colorScheme)
  }
  if(currentColumn %in% c("Batch", "SAMPLE ID")) {
    p <- p + labs(color = currentColumn) +
      guides(colour = guide_legend(override.aes = list(size=10))) +
      theme(legend.title = element_text(size=15),
            legend.text = element_text(size=15),
            legend.position = "bottom") +
      scale_colour_manual(values=as.character(iwanthue(length(unique(cd4_compass_subsets_w_umap[,currentColumn])))))
  } else {
    p <- p + theme(legend.position = "none")
  }
  p
}

Look for Batch and Patient sub-clustering

base_dimred_plot("Batch")

base_dimred_plot("SAMPLE ID")

Cytokine, Memory, and Activation expression localization

Now visualize the cytokine, memory, and activation expression localization

for(cg in cd4_gates_for_dimred) {
  print(base_dimred_plot(cg, colorScheme = boolColorScheme) +
    labs(title = sprintf("CD4+ COMPASS Subset+ UMAP\nColored by %s", sub(".*\\/([^(\\/)]+)", "\\1", cg))))
}

Color by degree

table(cd4_compass_subsets_w_umap$cytokine_degree)
## 
##      1      2      3      4      5 
## 113318  11137  14998   8286    206
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 6))

currentColumn <- "cytokine_degree"
pointSize <- 0.02
ggplot(cd4_compass_subsets_w_umap, aes(x=x.umap, y=y.umap, colour=!!as.name(currentColumn))) +
  geom_point(shape=20, alpha=0.8, size=pointSize) +
  facet_grid(Cohort ~ STIM, switch="y",
             labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
  labs(colour="Cytokine\nDegree",
       title="CD4+ COMPASS Subset+ UMAP\nColored by Cytokine Polyfunctionality") +
  theme_bw() +
  theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
        axis.text=element_blank(),
        axis.line=element_blank(),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        strip.text=element_text(face="bold", size=22),
        panel.grid.major = element_blank(),
        legend.title=element_text(face="bold", size=15),
        legend.text = element_text(size=13),
        strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")),
        legend.position = "bottom") +
  sc

Unstratified UMAP plots

pointSize <- 0.02
# Theme settings
# Expand axis limits so so contour lines don't get cut off near borders
gg_themes <- ggplot(cd4_compass_subsets_w_umap, aes(x=x.umap, y=y.umap)) +
  scale_x_continuous(limits=range(cd4_compass_subsets_w_umap$x.umap)*1.05) +
  scale_y_continuous(limits=range(cd4_compass_subsets_w_umap$y.umap)*1.05) +
  theme_bw() +
  theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
        axis.text=element_blank(),
        axis.line=element_blank(),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        panel.grid = element_blank(),
        legend.position = "none",
        plot.margin = margin(1, 0, 0, 0, unit = "pt"))
# Hospitalization status contour plots
hosp_contour_plot <- gg_themes +
  geom_point(shape=20, alpha=0.8, size=pointSize, colour="#E2E2E2") +
  geom_density_2d(data=cd4_compass_subsets_w_umap %>% dplyr::filter(Cohort == "Hospitalized"),
                  colour="#363636", bins=12) + # "red"
  ggtitle("Hosp")

nonhosp_contour_plot <- gg_themes +
  geom_point(shape=20, alpha=0.8, size=pointSize, colour="#E2E2E2") +
  geom_density_2d(data=cd4_compass_subsets_w_umap %>% dplyr::filter(Cohort == "Non-hospitalized"),
                  colour="#363636", bins=12) + # "blue"
  ggtitle("Not-Hosp")
# Scaled mfi expression plots for memory, activation, and cytokine markers 
mfi_col_text_4plot <- c("TNFa" = "TNF", "CD107a" = "CD107", 
                  "CD154" = "CD154", "IL2" = "IL-2", "IL17a" = "IL17a", 
                  "IL4/5/13" = "IL-4/5/13", "IFNg" = "IFN-γ", 
                  "CCR7" = "CCR7", "CD45RA" = "CD45RA",
                  "CD38" = "CD38", "HLADR" = "HLA-DR")
plot_scaled_mfi_v2 <- function(currentColumn) {
  myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
  sc_mfi <- scale_colour_gradientn(colours = myPalette(100), oob=squish, limits=c(-2.5, 2.5))
  gg_themes +
    geom_point(aes(colour=cd4_compass_subsets_w_umap[,paste0(currentColumn, ".scaled")]),
               shape=20, alpha=0.8, size=pointSize) + # !!as.name(paste0(currentColumn, ".scaled")) doesn't work
    sc_mfi +
    ggtitle(mfi_col_text_4plot[[currentColumn]])
}

unstratified_mfi_plot_cols <- c("TNFa", "CD107a", 
                  "CD154", "IL2", "IL17a", 
                  "IL4/5/13", "IFNg", 
                  "CCR7", "CD45RA",
                  "CD38", "HLADR")
mfi_plots_unstrat <- lapply(unstratified_mfi_plot_cols, plot_scaled_mfi_v2)
names(mfi_plots_unstrat) <- unstratified_mfi_plot_cols
# Memory UMAP plot colored by gate membership (TEMRA, TEM, TCM, Naive)
getMemoryCategory <- function(CD45RA, CCR7) {
  if(CD45RA & CCR7) {
    "Naive"
  } else if(CD45RA & !CCR7) {
    "TEMRA"
  } else if(!CD45RA & CCR7) {
    "TCM"
  } else if(!CD45RA & !CCR7) {
    "TEM"
  } else {
    "Uncategorized"
  }
}

cd4_compass_subsets_w_umap$MemoryCategory <- pmap_chr(.l = list(cd4_compass_subsets_w_umap$`/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+`,
                                                               cd4_compass_subsets_w_umap$`/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+`),
                                                     .f = getMemoryCategory)

memColorScheme <- c("TEM" = "#fc8d62", "TEMRA" = "#66c2a5", "TCM" = "#e78ac3", "Naive" = "#8da0cb")
# c("TEM" = "#1b9e77", "TEMRA" = "#7570b3", "TCM" = "#e7298a", "Naive" = "#d95f02")
# c("TEM" = "#d7191c", "TEMRA" = "#2c7bb6", "TCM" = "green3", "Naive" = "darkorange", "Uncategorized" = "#90c9dd")
mem_plot_colored_by_gate <- gg_themes +
  geom_point(aes(colour=cd4_compass_subsets_w_umap[,"MemoryCategory"]),
               shape=20, alpha=0.8, size=pointSize) +
  ggtitle("Memory") +
  scale_color_manual(values = memColorScheme)
# + theme(legend.position = "right") + guides(colour = guide_legend(override.aes = list(size=9)))
# Activation plots: color by gate membership, one plot per marker
boolColorScheme <- c("0" = "#E2E2E2", "1" = "#363636")
hladr_bool_plot <- gg_themes +
  geom_point(aes(colour=factor(cd4_compass_subsets_w_umap[,"/Time/LD-3+/1419-3+/S/Lymph/HLADR+"], levels = c(1, 0))),
               shape=20, alpha=0.8, size=pointSize) +
  scale_color_manual(values = boolColorScheme, labels=c("1"="Expressed", "0"="Not Expressed")) +
  ggtitle("HLA-DR")
cd38_bool_plot <- gg_themes +
  geom_point(aes(colour=factor(cd4_compass_subsets_w_umap[,"/Time/LD-3+/1419-3+/S/Lymph/CD38+"], levels = c(1, 0))),
               shape=20, alpha=0.8, size=pointSize) +
  scale_color_manual(values = boolColorScheme, labels=c("1"="Expressed", "0"="Not Expressed")) +
  ggtitle("CD38")
# factor_colors = hsv((seq(0, 1, length.out = 4 + 1)[-1] +
#                          0.2)%%1, 0.7, 0.95)
# 
# stim_colors <- c("Spike 1" = "#fc68be", "Spike 2" = "#d94e09", "NCAP" = "#6B49F2", "VEMP" = "#D0F249", "DMSO" = "#91570a") # heatmap colors
# stim_colors <- c("Spike 1" = "#49F2BF", "Spike 2" = "#F2497C", "NCAP" = "#6B49F2", "VEMP" = "#D0F249", "DMSO" = "#91570a")
stim_abbreviations = c("Spike 1" = "S1", "Spike 2" = "S2", "NCAP" = "NCAP", "VEMP" = "VEMP", "DMSO" = "DMSO")

plot_stim_contour_plot <- function(currentStim) {
  gg_themes +
    geom_point(shape=20, alpha=0.8, size=pointSize, colour="#E2E2E2") +
    geom_density_2d(data=cd4_compass_subsets_w_umap %>% dplyr::filter(STIM == currentStim),
                    colour="#363636", bins=12) + # stim_colors[[currentStim]]
    ggtitle(stim_abbreviations[[currentStim]])
}

stims <- c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")
stim_plots <- lapply(stims, plot_stim_contour_plot)
names(stim_plots) <- stims
# Unstratified PolyF plot
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc_polyf <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 6))
polyf_plot_unstrat <- gg_themes +
  geom_point(aes(colour=cd4_compass_subsets_w_umap[,"cytokine_degree"]), shape=20, alpha=0.8, size=pointSize) +
  sc_polyf +
  ggtitle("PolyF")
# Extract legends
mfi_legend <- as_ggplot(get_legend(mfi_plots_unstrat$TNFa +
                                     theme(legend.position="bottom",
                                             legend.title.align = 0.5) +
                                     labs(colour="Scaled Expression") +
                                     guides(colour=guide_colorbar(title.position = "top"))))
polyf_legend <- as_ggplot(get_legend(polyf_plot_unstrat +
                                       theme(legend.position="bottom",
                                             legend.title.align = 0.5) +
                                       labs(colour="PolyF") +
                                       guides(colour=guide_colorbar(title.position = "top"))))
mem_legend <- as_ggplot(get_legend(mem_plot_colored_by_gate +
                                     theme(legend.position = "right",
                                           legend.title.align = 0.5) +
                                     guides(colour = guide_legend(override.aes = list(size=9))) +
                                     labs(colour="Memory")))
bool_legend <- as_ggplot(get_legend(hladr_bool_plot +
                                     theme(legend.position = "right",
                                           legend.title.align = 0.5) +
                                     guides(colour = guide_legend(override.aes = list(size=9))) +
                                     labs(colour="Gate Membership")))

Put it all together

print(
  # Top row
  (hosp_contour_plot | nonhosp_contour_plot | plot_spacer() |
  stim_plots$`Spike 1` | stim_plots$`Spike 2` | plot_spacer() |
  polyf_plot_unstrat | plot_spacer() | mfi_plots_unstrat$IFNg) /
  # Second row
  (mem_plot_colored_by_gate | plot_spacer() | plot_spacer() |
  stim_plots$NCAP | stim_plots$VEMP | plot_spacer() |
  mfi_plots_unstrat$TNFa | mfi_plots_unstrat$IL2 | mfi_plots_unstrat$`IL4/5/13`) /
  # Third row
  (hladr_bool_plot | cd38_bool_plot | plot_spacer() |
  stim_plots$DMSO | plot_spacer() | plot_spacer() |
  mfi_plots_unstrat$IL17a | mfi_plots_unstrat$CD107a | mfi_plots_unstrat$CD154 )
)

if(save_output) {
  png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP.png",
                              date)),
      width=18, height=9, units="in", res=300)
    print(
      # Top row
      (hosp_contour_plot | nonhosp_contour_plot | plot_spacer() |
      stim_plots$`Spike 1` | stim_plots$`Spike 2` | plot_spacer() |
      polyf_plot_unstrat | plot_spacer() | mfi_plots_unstrat$IFNg) /
      # Second row
      (mem_plot_colored_by_gate | plot_spacer() | plot_spacer() |
      stim_plots$NCAP | stim_plots$VEMP | plot_spacer() |
      mfi_plots_unstrat$TNFa | mfi_plots_unstrat$IL2 | mfi_plots_unstrat$`IL4/5/13`) /
      # Third row
      (hladr_bool_plot | cd38_bool_plot | plot_spacer() |
      stim_plots$DMSO | plot_spacer() | plot_spacer() |
      mfi_plots_unstrat$IL17a | mfi_plots_unstrat$CD107a | mfi_plots_unstrat$CD154 )
    )
  dev.off()
  
  png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP_MFI_Legend.png",
                            date)),
    width=3, height=1.5, units="in", res=300)
  print(mfi_legend)
  dev.off()
  
  png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP_PolyF_Legend.png",
                            date)),
    width=3, height=1.5, units="in", res=300)
  print(polyf_legend)
  dev.off()
  
  png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP_Memory_Legend.png",
                            date)),
    width=2, height=3, units="in", res=300)
  print(mem_legend)
  dev.off()
  
  png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP_GateMembership_Legend.png",
                          date)),
  width=2, height=3, units="in", res=300)
  print(bool_legend)
  dev.off()
  
  # cairo_pdf(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP.cairo.pdf",
  #                             date)),
  #     width=12, height=9, onefile = TRUE, bg = "transparent", family = "Arial")
  # print(x)
  # dev.off()
}
## png 
##   2